pacman::p_load("tmap", "ExPanDaR", "kableExtra", "ggstatsplot", "tidyverse")EDA: Overview of Suicide Rate in a Particular Year
Step-by-Step Data Preparation
1. Installing and launching required R packages
2. Loading the data
suicidedata_eda_formap <- read_csv("data/suicidedata_eda_formap.csv", show_col_types = FALSE)suicidedata_eda <- read_csv("data/suicidedata_eda.csv", show_col_types = FALSE)3. Dataset overview
We will check the suicidedata_eda as suicidedata_eda_formap is meant to be combined with the sf file (world)
3.1 Missing Values
missing.values <- suicidedata_eda %>%
gather(key = "key", value = "val") %>%
mutate(is.missing = is.na(val)) %>%
group_by(key, is.missing) %>%
summarise(num.missing = n()) %>%
filter(is.missing==T) %>%
select(-is.missing) %>%
arrange(desc(num.missing))
missing.values %>% kable()| key | num.missing |
|---|---|
| DN | 18360 |
| DR | 18360 |
| POP | 18360 |
| SN | 18360 |
| SP | 18360 |
Visualising Missing Values
missing_values <- function(vari = "age_name"){
prepare_missing_values_graph(suicidedata_eda, ts_id = vari)
}By country
missing_values("country")
By gender
missing_values("sex_name")
By age group
missing_values("age_name")
By year
missing_values("year")
Conclusion : The missing values are because of age-standardised (only for Suicide Rates)
3.2 Descriptive statistics
descr_stats <- function(year, gender = "Both", age = "All ages") {
descr <- prepare_descriptive_table(suicidedata_eda %>%
filter(year == year,
sex_name == gender,
age_name == age) %>%
select(!c(6,8)) %>%
rename("Number of suicide" = "SN",
"Number of deaths" = "DN",
"Share of deaths from suicide (%)" = "SP",
"Suicide rate" = "SR",
"Mortality rate" = "DR"))
descr$kable_ret %>%
kable_styling("condensed", full_width = F, position = "center")
}descr_stats(2010, "Both", "All ages")| N | Mean | Std. dev. | Min. | 25 % | Median | 75 % | Max. | |
|---|---|---|---|---|---|---|---|---|
| Number of suicide | 6,120 | 3,874.828 | 18,425.617 | 0.126 | 93.934 | 532.835 | 1,882.305 | 220,356.720 |
| Number of deaths | 6,120 | 252,265.821 | 920,824.636 | 10.886 | 7,358.955 | 51,487.069 | 169,570.300 | 10,659,491.040 |
| Share of deaths from suicide (%) | 6,120 | 1.394 | 1.067 | 0.105 | 0.691 | 1.118 | 1.787 | 11.812 |
| Suicide rate | 6,120 | 11.339 | 9.395 | 1.427 | 5.424 | 8.224 | 14.601 | 95.565 |
| Mortality rate | 6,120 | 847.052 | 354.531 | 148.258 | 611.956 | 783.986 | 1,019.318 | 10,705.808 |
4. Plotting the choropleth map
4.1 Joining with world map (tmap object)
The object World is a spatial object of class sf from the sf package; it is a data.frame with a special column that contains a geometry for each row, in this case polygons
Reference - https://cran.r-project.org/web/packages/tmap/vignettes/tmap-getstarted.html
data("World")Joining the two dataframes together
suicidedata_eda_map <- left_join(World,
suicidedata_eda_formap %>% mutate(across(where(is.numeric), round, 2)),
by = c("iso_a3" = "code")) %>%
select(!c(2,3,4,6,7,8,9,10,11,12,13,14,15)) %>%
mutate(area = as.numeric(str_remove(`area`,
" \\[km\\^2\\]")),
.after = region) %>%
na.omit()Creating a function to plot
plot_map_eda <- function(year, metric = "SR", gender = "T", style = "jenks"){
metric_text = case_when(metric == "SR" ~ "Suicide rate",
metric == "SP" ~ "Share of deaths from suicide (%)",
metric == "SN" ~ "Number of suicide",
metric == "DN" ~ "Number of deaths",
metric == "DR" ~ "Mortality rate")
age = case_when(metric == "SR" ~ "AS",
metric == "SP" ~ "All",
metric == "SN" ~ "All",
metric == "DN" ~ "All",
metric == "DR" ~ "All")
gender_text = case_when(gender == "T" ~ "Total",
gender == "M" ~ "Male",
gender == "F" ~ "Female")
tmap_mode("view")
tm_shape(suicidedata_eda_map |>
filter(year == year))+
tm_fill(paste0(metric,"_",age,"_",gender),
style = style,
palette="YlOrBr",
id = "country",
title = paste0(metric_text, ", ",gender_text,", ", year),
popup.vars = c(value = paste0(metric,"_",age,"_",gender))) +
tm_borders(col = "grey20",
alpha = 0.5)
}plot_map_eda(2016, "SR", "M", "jenks")